#!/usr/bin/env bash

# Copyright 2024 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail
set -o xtrace

export CI=1
echo "Running with CI=${CI:-}"

REPO_ROOT=$(git rev-parse --show-toplevel)
cd ${REPO_ROOT}

WORKDIR=${REPO_ROOT}/.build/

BINDIR=${WORKDIR}/bin
mkdir -p "${BINDIR}"
go build -o ${BINDIR}/kops ./cmd/kops

export KOPS=${BINDIR}/kops

function cleanup() {
  echo "running dump-artifacts"
  ${REPO_ROOT}/tests/e2e/scenarios/bare-metal/dump-artifacts || true

  if [[ -z "${SKIP_CLEANUP:-}" ]]; then
    echo "running cleanup"
    ${REPO_ROOT}/tests/e2e/scenarios/bare-metal/cleanup || true
  fi
}

trap cleanup EXIT

# Create the directory that will back our mock s3 storage
rm -rf ${WORKDIR}/s3
mkdir -p ${WORKDIR}/s3/

IPV4_PREFIX=10.123.45.

VM0_IP=${IPV4_PREFIX}10
VM1_IP=${IPV4_PREFIX}11
VM2_IP=${IPV4_PREFIX}12

# Start our VMs
${REPO_ROOT}/tests/e2e/scenarios/bare-metal/start-vms

# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent
eval $(ssh-agent)
ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519

. hack/dev-build-metal.sh

echo "Waiting 10 seconds for VMs to start"
sleep 10

# Remove from known-hosts in case of reuse
ssh-keygen -f ~/.ssh/known_hosts -R ${VM0_IP} || true
ssh-keygen -f ~/.ssh/known_hosts -R ${VM1_IP} || true
ssh-keygen -f ~/.ssh/known_hosts -R ${VM2_IP} || true

# Check SSH is working and accept the host keys
ssh -o StrictHostKeyChecking=accept-new root@${VM0_IP} uptime
ssh -o StrictHostKeyChecking=accept-new root@${VM1_IP} uptime
ssh -o StrictHostKeyChecking=accept-new root@${VM2_IP} uptime

cd ${REPO_ROOT}

# Enable feature flag for bare metal
export KOPS_FEATURE_FLAGS=Metal

# Set up the AWS credentials
export AWS_SECRET_ACCESS_KEY=secret
export AWS_ACCESS_KEY_ID=accesskey
export AWS_ENDPOINT_URL=http://10.123.45.1:8443
export AWS_REGION=us-east-1

export S3_ENDPOINT=${AWS_ENDPOINT_URL}
export S3_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
export S3_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}

# Create the state-store bucket in our mock s3 server
export KOPS_STATE_STORE=s3://kops-state-store/
aws --version
aws s3 ls s3://kops-state-store || aws s3 mb s3://kops-state-store

# List clusters (there should not be any yet)
${KOPS} get cluster || true

# Create a cluster
${KOPS} create cluster --cloud=metal metal.k8s.local --zones main --networking cni

# Set the IP ingress, required for metal cloud
# TODO: is this the best option?
${KOPS} edit cluster metal.k8s.local  --set spec.api.publicName=${VM0_IP}

# Use latest etcd-manager image (while we're adding features)
#${KOPS} edit cluster metal.k8s.local --set 'spec.etcdClusters[*].manager.image=us-central1-docker.pkg.dev/k8s-staging-images/etcd-manager/etcd-manager-slim:v3.0.20250628-7-ga7be11fb'

# Use 1.31 kubernetes so we get kube-apiserver fixes
export KOPS_RUN_TOO_NEW_VERSION=1
"${KOPS}" edit cluster metal.k8s.local "--set=cluster.spec.kubernetesVersion=1.31.0"

# List clusters
${KOPS} get cluster
${KOPS} get cluster -oyaml

# List instance groups
${KOPS} get ig --name metal.k8s.local
${KOPS} get ig --name metal.k8s.local -oyaml

# Apply basic configuration
${KOPS} update cluster metal.k8s.local
${KOPS} update cluster metal.k8s.local --yes --admin

# Enroll the control-plane VM
${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host ${VM0_IP} --v=2

# Manual creation of "volumes" for etcd, and setting up peer nodes
cat <<EOF | ssh root@${VM0_IP} tee -a /etc/hosts

# Hosts added for etcd discovery
${VM0_IP} node0.main.metal.k8s.local
${VM0_IP} node0.events.metal.k8s.local
EOF

ssh root@${VM0_IP} cat /etc/hosts

ssh root@${VM0_IP} mkdir -p /mnt/disks/metal.k8s.local--main--0/mnt
ssh root@${VM0_IP} touch    /mnt/disks/metal.k8s.local--main--0/mnt/please-create-new-cluster

ssh root@${VM0_IP} mkdir -p /mnt/disks/metal.k8s.local--events--0/mnt
ssh root@${VM0_IP} touch    /mnt/disks/metal.k8s.local--events--0/mnt/please-create-new-cluster


echo "Waiting for kube to start"
# Wait for kube-apiserver to be ready, timeout after 10 minutes
for i in {1..60}; do
  if kubectl get nodes; then
    break
  fi
  sleep 10
done

kubectl get nodes
kubectl get pods -A

# Install kindnet
kubectl create -f https://raw.githubusercontent.com/aojea/kindnet/main/install-kindnet.yaml
echo "Waiting 10 seconds for kindnet to start"
sleep 10
kubectl get nodes
kubectl get pods -A

# For host records
kubectl create ns kops-system
kubectl apply -f ${REPO_ROOT}/k8s/crds/kops.k8s.io_hosts.yaml

# kops-controller extra permissions
kubectl apply --server-side -f - <<EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: kops-controller:pki-verifier
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: kops-controller:pki-verifier
subjects:
- apiGroup: rbac.authorization.k8s.io
  kind: User
  name: system:serviceaccount:kube-system:kops-controller
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: kops-controller:pki-verifier
rules:
- apiGroups:
  - "kops.k8s.io"
  resources:
  - hosts
  verbs:
  - get
  - list
  - watch
# Must be able to set node addresses
# TODO: Move out?
- apiGroups:
  - ""
  resources:
  - nodes/status
  verbs:
  - patch
EOF

function enroll_node() {
  local node_ip=$1

# Manual "discovery" for control-plane endpoints
# TODO: Replace with well-known IP
cat <<EOF | ssh root@${node_ip} tee -a /etc/hosts

# Hosts added for leader discovery
${VM0_IP} kops-controller.internal.metal.k8s.local
${VM0_IP} api.internal.metal.k8s.local
EOF

timeout 10m ${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group nodes-main --host ${node_ip} --v=2
}

enroll_node ${VM1_IP}
enroll_node ${VM2_IP}

echo "Waiting 30 seconds for nodes to be ready"
sleep 30

kubectl get nodes
kubectl get nodes -o yaml

kubectl get pods -A

# Ensure the cluster passes validation
${KOPS} validate cluster metal.k8s.local --wait=10m

# Run a few bare-metal e2e tests
echo "running e2e tests"
cd ${REPO_ROOT}/tests/e2e/scenarios/bare-metal
go test -v .

echo "Test successful"